## pval_cutoff: 0.05
## lfc_cutoff: 0.585
## low_counts_cutoff: 10
General statistics
# Number of samples
length(counts_data)
## [1] 6
# Number of genes
nrow(counts_data)
## [1] 43432
# Total counts
colSums(counts_data)
## SRR15202006 SRR15202007 SRR15202008 SRR15202009 SRR15202010 SRR15202011
## 12633373 11169811 11283591 10889859 11241553 8610959

Create DDS objects
# Create DESeqDataSet object
dds <- get_DESeqDataSet_obj(counts_data, ~ treatment)
## [1] TRUE
## [1] TRUE
## [1] "DESeqDataSet object of length 43432 with 0 metadata columns"
## [1] "DESeqDataSet object of length 17151 with 1 metadata column"
colData(dds)
## DataFrame with 6 rows and 2 columns
## treatment label
## <factor> <character>
## SRR15202006 control control
## SRR15202007 control control
## SRR15202008 control control
## SRR15202009 diabetes diabetes
## SRR15202010 diabetes diabetes
## SRR15202011 diabetes diabetes
Sample-to-sample comparisons
# Transform data (blinded rlog)
rld <- get_transformed_data(dds)
PCA plot
pca <- rld$pca
pca_df <- cbind(as.data.frame(colData(dds)) %>% rownames_to_column(var = 'name'), pca$x)
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 7.7864 6.3073 4.4317 3.8287 3.39901 5.78e-14
## Proportion of Variance 0.4145 0.2720 0.1343 0.1002 0.07899 0.00e+00
## Cumulative Proportion 0.4145 0.6865 0.8208 0.9210 1.00000 1.00e+00
ggplot(pca_df, aes(x = PC1, y = PC2, color = label)) +
geom_point() +
geom_text(aes(label = name), position = position_nudge(y = -2), show.legend = F, size = 3) +
scale_color_manual(values = colors_default) +
scale_x_continuous(expand = c(0.2, 0))

Correlation heatmap
pheatmap(
cor(rld$matrix),
annotation_col = as.data.frame(colData(dds)) %>% select(label),
color = brewer.pal(8, 'YlOrRd')
)
Wald test results
# DE analysis using Wald test
dds_full <- DESeq(dds)
colData(dds_full)
## DataFrame with 6 rows and 3 columns
## treatment label sizeFactor
## <factor> <character> <numeric>
## SRR15202006 control control 1.2043777425007
## SRR15202007 control control 1.03214138772615
## SRR15202008 control control 1.0251262440296
## SRR15202009 diabetes diabetes 1.01640230632695
## SRR15202010 diabetes diabetes 1.02730614790438
## SRR15202011 diabetes diabetes 0.762996097993412
# Wald test results
res <- results(
dds_full,
contrast = c('treatment', condition, control),
alpha = pval_cutoff
)
res
## log2 fold change (MLE): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 17151 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000051951 2.95089644249448 0.279703253177714 1.1942921557048 0.23420002538043 0.814829695315352 NA
## ENSMUSG00000025900 7.97288199250445 -2.1413276855426 3.30647902913793 -0.647615686254903 0.517233549093975 0.95431691681871
## ENSMUSG00000033845 601.463532980449 -0.0187530775381286 0.131828792547704 -0.142253275446959 0.886879951897741 0.997519245008607
## ENSMUSG00000102275 11.3733935493765 0.12713742481666 0.616187733155848 0.206329042231199 0.836533894147812 0.995909456931251
## ENSMUSG00000025903 784.45487767562 -0.332543444906407 0.136982875966564 -2.42762785172927 0.0151979290838798 0.239832708375858
## ... ... ... ... ... ... ...
## ENSMUSG00000099876 42.6820796468785 0.204126888413843 0.345932722058393 0.590076842685575 0.555139133309812 0.959866006696008
## ENSMUSG00000068457 262.277684372029 -0.402217870623922 0.195006503002258 -2.06258696213462 0.0391518875226545 0.408270703244683
## ENSMUSG00000069045 765.281216049028 -0.194584983294208 0.123371783854282 -1.57722436374948 0.114743908862716 0.644345025195248
## ENSMUSG00000101059 4.62892395099965 -0.766518023576525 1.02049086498626 -0.751126786016691 0.452576356674798 NA
## ENSMUSG00000096768 223.287123468951 -0.472724015123087 0.694267062556458 -0.680896503115679 0.49593698091859 0.952824493957555
mcols(res)
## DataFrame with 6 rows and 2 columns
## type description
## <character> <character>
## baseMean intermediate mean of normalized counts for all samples
## log2FoldChange results log2 fold change (MLE): treatment diabetes vs control
## lfcSE results standard error: treatment diabetes vs control
## stat results Wald statistic: treatment diabetes vs control
## pvalue results Wald test p-value: treatment diabetes vs control
## padj results BH adjusted p-values
summary(res)
##
## out of 17151 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 220, 1.3%
## LFC < 0 (down) : 166, 0.97%
## outliers [1] : 5, 0.029%
## low counts [2] : 2328, 14%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotDispEsts(dds_full)

Summary details
# Upregulated genes (LFC > 0)
res_sig_df %>% filter(log2FoldChange > 0)
# Downregulated genes (LFC < 0)
res_sig_df %>% filter(log2FoldChange < 0)
# Outliers (pvalue and padj are NA)
res[which(is.na(res$pvalue)), ]
## log2 fold change (MLE): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 5 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000058207 13.5434410931545 -6.11312772840901 3.35524067894612 -1.82196400001002 NA NA
## ENSMUSG00000027360 43.9821595174391 2.26097698717336 1.47671996568283 1.53108039419504 NA NA
## ENSMUSG00000029368 113.385059233787 -6.56226311685177 2.43612373943613 -2.69373144336695 NA NA
## ENSMUSG00000030324 111.996189433555 -1.67519555819802 1.72582377649263 -0.970664317536808 NA NA
## ENSMUSG00000034837 44.1043097621375 -1.47703917059914 1.66797260027614 -0.885529636610701 NA NA
# Low counts (only padj is NA)
res[which(is.na(res$padj) & !is.na(res$pvalue)), ]
## log2 fold change (MLE): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 2328 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000051951 2.95089644249448 0.279703253177714 1.1942921557048 0.23420002538043 0.814829695315352 NA
## ENSMUSG00000062588 1.55595973449408 -0.41982114986208 1.65686392903135 -0.253382998148507 0.799972264585819 NA
## ENSMUSG00000097797 2.82656993270396 0.0206644731532153 1.20773714432366 0.0171100750277806 0.986348801379757 NA
## ENSMUSG00000076135 1.71888632802706 -1.26246739521251 1.7210899571685 -0.733527837957695 0.463236555450097 NA
## ENSMUSG00000079671 2.75021741537875 3.01565675609882 1.49176938258954 2.02153013146307 0.0432249164488134 NA
## ... ... ... ... ... ... ...
## ENSMUSG00000084920 3.39025145768402 -0.102444686102827 1.16975662279002 -0.0875777782377359 0.930212264699795 NA
## ENSMUSG00000084806 2.03762357760336 -0.269310715993311 1.41818928273415 -0.189897582270615 0.849389387066993 NA
## ENSMUSG00000049176 1.8519494382504 -0.0168883280312025 1.45128605944502 -0.011636801663802 0.990715385162057 NA
## ENSMUSG00000087159 3.24778701734495 -0.299778227099337 1.22269964111491 -0.245177324846507 0.80631913280765 NA
## ENSMUSG00000101059 4.62892395099965 -0.766518023576525 1.02049086498626 -0.751126786016691 0.452576356674798 NA
Shrunken LFC results
plotMA(res)

# Shrunken LFC results
res_shrunken <- lfcShrink(
dds_full,
coef = str_c('treatment_', condition, '_vs_', control),
type = 'apeglm'
)
res_shrunken
## log2 fold change (MAP): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 17151 rows and 5 columns
## baseMean log2FoldChange lfcSE pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000051951 2.95089644249448 0.00187171449887882 0.0971667441043587 0.814829695315352 NA
## ENSMUSG00000025900 7.97288199250445 -0.00158853789545442 0.0974559781761649 0.517233549093975 NA
## ENSMUSG00000033845 601.463532980449 -0.00604166955756916 0.0785063883444738 0.886879951897741 0.995641175088391
## ENSMUSG00000102275 11.3733935493765 0.00313091597085892 0.0963427725847407 0.836533894147812 0.990912363450942
## ENSMUSG00000025903 784.45487767562 -0.206896515643857 0.156542098108894 0.0151979290838798 0.226347605461533
## ... ... ... ... ... ...
## ENSMUSG00000099876 42.6820796468785 0.0152742607975724 0.0954221087620556 0.555139133309812 0.944317287566142
## ENSMUSG00000068457 262.277684372029 -0.127704922927983 0.180991591127442 0.0391518875226545 0.385683455912078
## ENSMUSG00000069045 765.281216049028 -0.0919261403692224 0.103307102755589 0.114743908862716 0.618082453181188
## ENSMUSG00000101059 4.62892395099965 -0.00696388891517158 0.0973919512573816 0.452576356674798 NA
## ENSMUSG00000096768 223.287123468951 -0.00906694965251425 0.0971539452206115 0.49593698091859 0.935226118158635
plotMA(res_shrunken)

mcols(res_shrunken)
## DataFrame with 5 rows and 2 columns
## type description
## <character> <character>
## baseMean intermediate mean of normalized counts for all samples
## log2FoldChange results log2 fold change (MAP): treatment diabetes vs control
## lfcSE results posterior SD: treatment diabetes vs control
## pvalue results Wald test p-value: treatment diabetes vs control
## padj results BH adjusted p-values
summary(res_shrunken, alpha = pval_cutoff)
##
## out of 17151 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 219, 1.3%
## LFC < 0 (down) : 167, 0.97%
## outliers [1] : 5, 0.029%
## low counts [2] : 3325, 19%
## (mean count < 8)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
Summary details
# Upregulated genes (LFC > 0)
res_shrunken_sig_df %>% filter(log2FoldChange > 0)
# Downregulated genes (LFC < 0)
res_shrunken_sig_df %>% filter(log2FoldChange < 0)
# Outliers (pvalue and padj are NA)
res_shrunken[which(is.na(res_shrunken$pvalue)), ]
## log2 fold change (MAP): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 5 rows and 5 columns
## baseMean log2FoldChange lfcSE pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000058207 13.5434410931545 -0.00268899436862778 0.0975156613611566 NA NA
## ENSMUSG00000027360 43.9821595174391 0.0084195352379722 0.0978812781292859 NA NA
## ENSMUSG00000029368 113.385059233787 -0.00476873883780973 0.0976344681684895 NA NA
## ENSMUSG00000030324 111.996189433555 -0.00479275678924692 0.097525162140546 NA NA
## ENSMUSG00000034837 44.1043097621375 -0.00464703169330119 0.0974973368189546 NA NA
# Low counts (only padj is NA)
res_shrunken[which(is.na(res_shrunken$padj) & !is.na(res_shrunken$pvalue)), ]
## log2 fold change (MAP): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 3325 rows and 5 columns
## baseMean log2FoldChange lfcSE pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000051951 2.95089644249448 0.00187171449887882 0.0971667441043587 0.814829695315352 NA
## ENSMUSG00000025900 7.97288199250445 -0.00158853789545442 0.0974559781761649 0.517233549093975 NA
## ENSMUSG00000062588 1.55595973449408 -0.00146116856002583 0.0973092974292943 0.799972264585819 NA
## ENSMUSG00000102135 6.74676645653609 -0.00634244187437729 0.0970827486929454 0.579524489126369 NA
## ENSMUSG00000103509 6.69317248987611 -0.00234133032591324 0.0969429700990804 0.818834025004068 NA
## ... ... ... ... ... ...
## ENSMUSG00000044583 7.56298447214514 0.0214352675831036 0.100274027026307 0.0729895707077782 NA
## ENSMUSG00000049176 1.8519494382504 -6.65621866325914e-05 0.0972451929753862 0.990715385162057 NA
## ENSMUSG00000087159 3.24778701734495 -0.00186927340549154 0.0971818623139033 0.80631913280765 NA
## ENSMUSG00000072844 5.19664226484586 -0.0102530689047531 0.0978068110600661 0.282769897861314 NA
## ENSMUSG00000101059 4.62892395099965 -0.00696388891517158 0.0973919512573816 0.452576356674798 NA
Visualizing results
Heatmaps
# Plot normalized counts (z-scores)
pheatmap(counts_sig_norm[2:7],
color = brewer.pal(8, 'YlOrRd'),
cluster_rows = T,
show_rownames = F,
annotation_col = as.data.frame(colData(dds)) %>% select(label),
border_color = NA,
fontsize = 10,
scale = 'row',
fontsize_row = 10,
height = 20)
# Plot log-transformed counts
pheatmap(counts_sig_log[2:7],
color = rev(brewer.pal(8, 'RdYlBu')),
cluster_rows = T,
show_rownames = F,
annotation_col = as.data.frame(colData(dds)) %>% select(label),
border_color = NA,
fontsize = 10,
fontsize_row = 10,
height = 20)


# Plot log-transformed counts (top 24 DE genes)
pheatmap((counts_sig_log %>% filter(ensembl_gene_id %in% res_sig_df$ensembl_gene_id[1:24]))[2:7],
color = rev(brewer.pal(8, 'RdYlBu')),
cluster_rows = T,
show_rownames = F,
annotation_col = as.data.frame(colData(dds)) %>% select(label),
fontsize = 10,
fontsize_row = 10,
height = 20)

Volcano plots
# Unshrunken LFC
res_df %>%
mutate(
sig_threshold = if_else(
padj < pval_cutoff & abs(log2FoldChange) >= lfc_cutoff,
if_else(log2FoldChange > 0, 'DE-up', 'DE-down'),
'non-DE'
)
) %>%
filter(!is.na(sig_threshold)) %>%
ggplot() +
geom_point(aes(x = log2FoldChange, y = -log10(padj), colour = sig_threshold)) +
scale_color_manual(values = c('blue', 'red', 'gray')) +
xlab('log2 fold change') +
ylab('-log10 adjusted p-value')

# Shrunken LFC
res_shrunken_df %>%
mutate(
sig_threshold = if_else(
padj < pval_cutoff & abs(log2FoldChange) >= lfc_cutoff,
if_else(log2FoldChange > 0, 'DE-up', 'DE-down'),
'non-DE'
)
) %>%
filter(!is.na(sig_threshold)) %>%
ggplot() +
geom_point(aes(x = log2FoldChange, y = -log10(padj), colour = sig_threshold)) +
scale_color_manual(values = c('blue', 'red', 'gray')) +
xlab('log2 fold change') +
ylab('-log10 adjusted p-value')

GSEA (all)
Hallmark genesets
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_h) %>% plot_enrichment_table(rank_lfc, mm_h)

# Wald stat
get_fgsea_res(rank_stat, mm_h) %>% plot_enrichment_table(rank_stat, mm_h)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_h) %>% plot_enrichment_table(rank_pval, mm_h)

GO biological process
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_bp) %>% plot_enrichment_table(rank_lfc, mm_c5_bp)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_bp) %>% plot_enrichment_table(rank_stat, mm_c5_bp)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_bp) %>% plot_enrichment_table(rank_pval, mm_c5_bp)

GO cellular component
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_cc) %>% plot_enrichment_table(rank_lfc, mm_c5_cc)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_cc) %>% plot_enrichment_table(rank_stat, mm_c5_cc)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_cc) %>% plot_enrichment_table(rank_pval, mm_c5_cc)

GO molecular function
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_mf) %>% plot_enrichment_table(rank_lfc, mm_c5_mf)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_mf) %>% plot_enrichment_table(rank_stat, mm_c5_mf)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_mf) %>% plot_enrichment_table(rank_pval, mm_c5_mf)

GSEA (DE)
Hallmark genesets
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_h) %>% plot_enrichment_table(rank_lfc, mm_h)

# Wald stat
get_fgsea_res(rank_stat, mm_h) %>% plot_enrichment_table(rank_stat, mm_h)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_h) %>% plot_enrichment_table(rank_pval, mm_h)

GO biological process
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_bp) %>% plot_enrichment_table(rank_lfc, mm_c5_bp)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_bp) %>% plot_enrichment_table(rank_stat, mm_c5_bp)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_bp) %>% plot_enrichment_table(rank_pval, mm_c5_bp)

GO cellular component
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_cc) %>% plot_enrichment_table(rank_lfc, mm_c5_cc)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_cc) %>% plot_enrichment_table(rank_stat, mm_c5_cc)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_cc) %>% plot_enrichment_table(rank_pval, mm_c5_cc)

GO molecular function
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_mf) %>% plot_enrichment_table(rank_lfc, mm_c5_mf)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_mf) %>% plot_enrichment_table(rank_stat, mm_c5_mf)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_mf) %>% plot_enrichment_table(rank_pval, mm_c5_mf)

System Info
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggrepel_0.9.1 readxl_1.3.1 multiMiR_1.8.0 VennDiagram_1.6.20 futile.logger_1.4.3 fgsea_1.12.0 Rcpp_1.0.3 RColorBrewer_1.1-2 pheatmap_1.0.12 DESeq2_1.26.0 SummarizedExperiment_1.16.1 DelayedArray_0.12.3 BiocParallel_1.20.1 matrixStats_0.57.0 Biobase_2.46.0 GenomicRanges_1.38.0 GenomeInfoDb_1.22.1 IRanges_2.20.2 S4Vectors_0.24.4 BiocGenerics_0.32.0 scales_1.1.1 forcats_0.4.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.3 readr_1.3.1 tidyr_1.0.0 tibble_3.1.0 ggplot2_3.3.3 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 ellipsis_0.3.0 htmlTable_1.13.3 XVector_0.26.0 base64enc_0.1-3 rstudioapi_0.10 farver_2.1.0 bit64_0.9-7 mvtnorm_1.1-1 apeglm_1.8.0 AnnotationDbi_1.48.0 fansi_0.4.0 lubridate_1.7.4 xml2_1.2.2 splines_3.6.3 geneplotter_1.64.0 knitr_1.25 Formula_1.2-3 jsonlite_1.6 broom_0.7.5 annotate_1.64.0 cluster_2.1.0 png_0.1-7 compiler_3.6.3 httr_1.4.1 backports_1.1.5 assertthat_0.2.1 Matrix_1.2-18 cli_1.1.0 formatR_1.7 acepack_1.4.1 htmltools_0.5.1.1 tools_3.6.3 coda_0.19-3 gtable_0.3.0 glue_1.4.2 GenomeInfoDbData_1.2.2 fastmatch_1.1-0 bbmle_1.0.23.1 cellranger_1.1.0 jquerylib_0.1.3 vctrs_0.3.4 xfun_0.22 rvest_0.3.5 lifecycle_0.2.0 XML_3.99-0.3 MASS_7.3-51.5 zlibbioc_1.32.0 hms_0.5.2 lambda.r_1.2.4 yaml_2.2.0 memoise_1.1.0 gridExtra_2.3 emdbook_1.3.12 sass_0.3.1 bdsmatrix_1.3-4 rpart_4.1-15 latticeExtra_0.6-29 stringi_1.4.3 RSQLite_2.2.1 genefilter_1.68.0 checkmate_1.9.4 rlang_0.4.8 pkgconfig_2.0.3 bitops_1.0-6 evaluate_0.14 lattice_0.20-38 labeling_0.3 htmlwidgets_1.5.1 bit_1.1-15.1 tidyselect_1.1.0 plyr_1.8.4 magrittr_1.5 R6_2.4.0 generics_0.0.2 Hmisc_4.3-0 DBI_1.1.0 pillar_1.5.1 haven_2.2.0 foreign_0.8-75 withr_2.1.2 survival_3.1-8 RCurl_1.95-4.12 nnet_7.3-12 modelr_0.1.5 crayon_1.3.4 futile.options_1.0.1 utf8_1.1.4 rmarkdown_2.7 jpeg_0.1-8.1 locfit_1.5-9.4 data.table_1.13.6 blob_1.2.1 digest_0.6.27 xtable_1.8-4 numDeriv_2016.8-1.1 munsell_0.5.0 bslib_0.2.4